library(zoo)
library(lubridate)
library(mgcv)
library(TSA)
library(dynlm)
library(tseries)
library(vars)
library(rugarch)
library(forecast)
library(wesanderson)
library(knitr)
#Read in data, split into full and abridged data
full.data<-read.csv("jena_climate_2009_2016.csv")
full.data$Date.Time <- as.POSIXct(full.data$Date.Time, format="%d.%m.%Y %H:%M:%S")
full.data$Date <- as.Date(full.data$Date.Time)
full.data$Hour <- hour(full.data$Date.Time)
df <- data.frame(datetime = full.data$Date.Time, date = full.data$Date, hour = full.data$Date.Time, pressure <- full.data$p..mbar., temperature = full.data$Tpot..K., humidity = full.data$sh..g.kg.)
data.daily <- aggregate(df[-c(1,2,3)], list(df$date), mean)
colnames(data.daily) <- c("date", "pressure", "temperature", "humidity")
data.daily$month <- month(data.daily$date)

data.hourly <- aggregate(df[-c(1,2,3)], list(df$date, df$hour), mean)
colnames(data.hourly) <- c("date", "hour", "pressure", "temperature", "humidity")
data.hourly$month <- month(data.hourly$date)

daily.training.data <- data.daily[1:2879,]
daily.training.date <- data.daily[1:2879,1]
daily.testing.data <- data.daily[2880:2921,]
daily.testing.date <- data.daily[2880:2921,1]
daily.training.month <- data.daily[1:2879,5]
daily.testing.month <- data.daily[2880:2921,5]

test.yday <- yday(daily.testing.date[1])

daily.pressure.ts <- ts(data.daily$pressure, frequency = 365, start=c(2009,1))
daily.temperature.ts <- ts(data.daily$temperature, frequency = 365, start=c(2009,1))
daily.humidity.ts <- ts(data.daily$humidity, frequency = 365, start=c(2009,1))

daily.pressure.train.ts <- ts(daily.training.data$pressure, frequency = 365, start=c(2009,1))
daily.temp.train.ts <- ts(daily.training.data$temperature, frequency = 365, start=c(2009,1))
daily.humidity.train.ts <- ts(daily.training.data$humidity, frequency = 365, start=c(2009,1))

daily.pressure.test.ts <- ts(daily.testing.data$pressure, frequency = 365, start=c(2016,test.yday))
daily.temp.test.ts <- ts(daily.testing.data$temperature, frequency = 365, start=c(2016,test.yday))
daily.humidity.test.ts <- ts(daily.testing.data$humidity, frequency = 365, start=c(2016,test.yday))
ts.plot(daily.pressure.train.ts, type="l", main=paste("pressure"),ylab="")

ts.plot(daily.temp.train.ts, type="l", main=paste("temperature"),ylab="")

ts.plot(daily.humidity.train.ts, type="l", main=paste("humidity"),ylab="")

stats::acf(as.matrix(daily.training.data[-c(1,5)]))

Non-parametric trend and ANOVA monthly seasonality

daily.time.pts <- c(1:length(daily.training.date))
daily.time.pts <- c(daily.time.pts - min(daily.time.pts))/max(daily.time.pts)

Pressure

gam1.fitted = gam(daily.pressure.train.ts ~ s(daily.time.pts)+factor(daily.training.month))
summary(gam1.fitted)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## daily.pressure.train.ts ~ s(daily.time.pts) + factor(daily.training.month)
## 
## Parametric coefficients:
##                                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                    988.29162    0.50345 1963.024  < 2e-16 ***
## factor(daily.training.month)2   -0.72118    0.72098   -1.000 0.317262    
## factor(daily.training.month)3    2.35401    0.70459    3.341 0.000846 ***
## factor(daily.training.month)4    0.07709    0.71134    0.108 0.913712    
## factor(daily.training.month)5    0.32222    0.70677    0.456 0.648492    
## factor(daily.training.month)6    1.02128    0.71413    1.430 0.152798    
## factor(daily.training.month)7    0.53038    0.71017    0.747 0.455225    
## factor(daily.training.month)8    1.23343    0.71230    1.732 0.083448 .  
## factor(daily.training.month)9    2.52162    0.72038    3.500 0.000472 ***
## factor(daily.training.month)10   2.44909    0.71786    3.412 0.000655 ***
## factor(daily.training.month)11  -1.12845    0.72876   -1.548 0.121624    
## factor(daily.training.month)12   0.33730    0.73621    0.458 0.646873    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                     edf Ref.df     F p-value    
## s(daily.time.pts) 8.653  8.965 8.836  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0419   Deviance explained = 4.85%
## GCV = 61.873  Scale est. = 61.429    n = 2879

Temperature

gam2.fitted = gam(daily.temp.train.ts ~ s(daily.time.pts)+factor(daily.training.month))
summary(gam2.fitted)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## daily.temp.train.ts ~ s(daily.time.pts) + factor(daily.training.month)
## 
## Parametric coefficients:
##                                Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                    274.0294     0.2670 1026.228  < 2e-16 ***
## factor(daily.training.month)2    0.7065     0.3824    1.848   0.0647 .  
## factor(daily.training.month)3    4.4256     0.3737   11.843  < 2e-16 ***
## factor(daily.training.month)4    9.8243     0.3773   26.041  < 2e-16 ***
## factor(daily.training.month)5   13.2875     0.3748   35.448  < 2e-16 ***
## factor(daily.training.month)6   16.4009     0.3788   43.302  < 2e-16 ***
## factor(daily.training.month)7   19.3199     0.3767   51.293  < 2e-16 ***
## factor(daily.training.month)8   18.4052     0.3778   48.717  < 2e-16 ***
## factor(daily.training.month)9   14.3539     0.3821   37.566  < 2e-16 ***
## factor(daily.training.month)10   9.0388     0.3808   23.738  < 2e-16 ***
## factor(daily.training.month)11   5.6022     0.3865   14.493  < 2e-16 ***
## factor(daily.training.month)12   1.8139     0.3905    4.645 3.55e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                   edf Ref.df     F p-value    
## s(daily.time.pts) 8.7  8.974 15.02  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.722   Deviance explained = 72.4%
## GCV = 17.403  Scale est. = 17.277    n = 2879

Humidity

gam3.fitted = gam(daily.humidity.train.ts ~ s(daily.time.pts)+factor(daily.training.month))
summary(gam3.fitted)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## daily.humidity.train.ts ~ s(daily.time.pts) + factor(daily.training.month)
## 
## Parametric coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.39232    0.09080  37.361  < 2e-16 ***
## factor(daily.training.month)2  -0.04639    0.13004  -0.357 0.721321    
## factor(daily.training.month)3   0.60623    0.12709   4.770 1.93e-06 ***
## factor(daily.training.month)4   1.73257    0.12830  13.504  < 2e-16 ***
## factor(daily.training.month)5   3.19271    0.12748  25.046  < 2e-16 ***
## factor(daily.training.month)6   4.82367    0.12880  37.450  < 2e-16 ***
## factor(daily.training.month)7   6.07194    0.12808  47.406  < 2e-16 ***
## factor(daily.training.month)8   5.76366    0.12846  44.866  < 2e-16 ***
## factor(daily.training.month)9   4.53389    0.12992  34.898  < 2e-16 ***
## factor(daily.training.month)10  2.72323    0.12946  21.035  < 2e-16 ***
## factor(daily.training.month)11  1.53642    0.13143  11.690  < 2e-16 ***
## factor(daily.training.month)12  0.48842    0.13278   3.678 0.000239 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                     edf Ref.df     F p-value    
## s(daily.time.pts) 8.602  8.954 12.97  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.701   Deviance explained = 70.3%
## GCV = 2.0129  Scale est. = 1.9985    n = 2879

Response: Based on the model outputs above we can conclude the following: for all 3 models the monthly seasonality is significant. Trend is also significant for all 3 models.

gam1.fitted.ts = ts(fitted(gam1.fitted), freq = 365, start = c(2009,1))
plot(daily.pressure.train.ts,ylab="Pressure", col="black")
lines(gam1.fitted.ts, lwd=1.5,col="purple")
legend(x="topleft", legend=c("Pressure", "Non-parametric trend and monthly seasonality using ANOVA"), fill = c("black", "purple"), cex=0.7, lty=1)

gam2.fitted.ts = ts(fitted(gam2.fitted), freq = 365, start = c(2009,1))
plot(daily.temp.train.ts,ylab="Temperature", col="black")
lines(gam2.fitted.ts, lwd=1.5,col="purple")
legend(x="topleft", legend=c("Temperature", "Non-parametric trend and monthly seasonality using ANOVA"), fill = c("black", "purple"), cex=0.7, lty=1)

gam3.fitted.ts = ts(fitted(gam3.fitted), freq = 365, start = c(2009,1))
plot(daily.humidity.train.ts,ylab="Humidity", col="black")
lines(gam3.fitted.ts, lwd=1.5,col="purple")
legend(x="topleft", legend=c("Humidity", "Non-parametric trend and monthly seasonality using ANOVA"), fill = c("black", "purple"), cex=0.7, lty=1)

Response: Looking at the plots above, we can see that for all 3 time series we were able to capture the main pattern, especially it is visible for temperature and humidity time series that have clear seasonality patterns.

Residual Analysis

par(mfrow=c(2,2))
ts.plot(ts(gam1.fitted$residuals, frequency = 365, start = c(2009,1)), main="Pressure Residuals")
acf(gam1.fitted$residuals, lag.max = 40, main="ACF of Pressure Residuals")
acf(gam1.fitted$residuals^2, lag.max = 40, main="ACF of Squared Pressure Residuals")
pacf(gam1.fitted$residuals, lag.max = 40, main="PACF of Pressure Residuals")

adf.test(gam1.fitted$residuals, k=1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gam1.fitted$residuals
## Dickey-Fuller = -23.051, Lag order = 1, p-value = 0.01
## alternative hypothesis: stationary
adf.test(gam1.fitted$residuals, k=5)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gam1.fitted$residuals
## Dickey-Fuller = -16.966, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(gam1.fitted$residuals)
## 
##  KPSS Test for Level Stationarity
## 
## data:  gam1.fitted$residuals
## KPSS Level = 0.018621, Truncation lag parameter = 9, p-value = 0.1

Response: Looking at the time series plot variance seems to be somewhat constant, howevered acf of squared residuals shows some correlation. On the ACF plot we see that most of the values after lag 7 are within the confidence band, which indicates stationarity. Results of Augmented Dickey-Fuller Test and KPSS test also suggest stationary data. To conclude, based on time series plot, ACF plot and results of the test for stationarity we conclude that the residuals of the first model are stationary and there’s heteroscedasticity present.

par(mfrow=c(2,2))
ts.plot(ts(gam2.fitted$residuals, frequency = 365, start = c(2009,1)), main="Temperature Residuals")
acf(gam2.fitted$residuals, lag.max = 40, main="ACF of Temperature Residuals")
acf(gam2.fitted$residuals^2, lag.max = 40, main="ACF of Squared Temperature Residuals")
pacf(gam2.fitted$residuals, lag.max = 40, main="PACF of Temperature Residuals")

adf.test(gam2.fitted$residuals, k=1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gam2.fitted$residuals
## Dickey-Fuller = -20.712, Lag order = 1, p-value = 0.01
## alternative hypothesis: stationary
adf.test(gam2.fitted$residuals, k=5)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gam2.fitted$residuals
## Dickey-Fuller = -15.875, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(gam2.fitted$residuals)
## 
##  KPSS Test for Level Stationarity
## 
## data:  gam2.fitted$residuals
## KPSS Level = 0.013906, Truncation lag parameter = 9, p-value = 0.1

Response: Looking at the time series plot variance seems to be constant and squared residuals also show the same result. On the ACF plot we see that most of the values after lag 8 are within the confidence band, which indicates stationarity. However, there are might be a seasonal pattern left in the residuals. Results of Augmented Dickey-Fuller Test and KPSS test suggest stationary data. Based on time series plot, ACF plot and results of the test for stationarity we conclude that the residuals of the second model seem to be stationary and the variance seem to be constant.

par(mfrow=c(2,2))
ts.plot(ts(gam3.fitted$residuals, frequency = 365, start = c(2009,1)), main="Humidity Residuals")
acf(gam3.fitted$residuals, lag.max = 40, main="ACF of Humidity Residuals")
acf(gam3.fitted$residuals^2, lag.max = 40, main="ACF of Squared Humidity Residuals")
pacf(gam3.fitted$residuals, lag.max = 40, main="PACF of Humidity Residuals")

adf.test(gam3.fitted$residuals, k=1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gam3.fitted$residuals
## Dickey-Fuller = -24.121, Lag order = 1, p-value = 0.01
## alternative hypothesis: stationary
adf.test(gam3.fitted$residuals, k=5)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  gam3.fitted$residuals
## Dickey-Fuller = -17.022, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(gam3.fitted$residuals)
## 
##  KPSS Test for Level Stationarity
## 
## data:  gam3.fitted$residuals
## KPSS Level = 0.010698, Truncation lag parameter = 9, p-value = 0.1

Response: Looking at the time series plot and ACF plot of Squared residuals we conclude that the variance isn’t constant. On the ACF plot we see a seasonal pattern, which indicates non stationarity. However, results of Augmented Dickey-Fuller Test and KPSS both test suggest stationary data. So there is not enough evidence to indicate non stationary data. To conclude, based on time series plot, ACF plot and results of the test for stationarity we conclude that the residuals of the third model are close to stationary, but we cannot say for sure. There’s also heteroscedasticity in the residuals.

Predictions

Pressure

prediction.h <- c()
prediction.p <- c()
prediction.t <- c()

for (i in 1:6) {
  data <- daily.training.data
  new.data <- daily.testing.data[c((1 + (i - 1)*7):(i*7)),]
  if (i >= 2) {
    data <- rbind(data, daily.testing.data[c(1:((i - 1)*7)),])
  }
  tps <- c(1:nrow(data.daily))
  tps <- c(tps - min(tps))/max(tps)
  data$tps <- tps[1:nrow(data)]
  new.data$tps <- tps[(nrow(data) + 1):(nrow(data) + 7)]
  
  gam.p <- gam(pressure ~ s(tps)+factor(month), data = data)
  prediction.p <- c(prediction.p, predict.gam(gam.p, new.data))
  
  gam.t <- gam(temperature ~ s(tps)+factor(month), data = data)
  prediction.t <- c(prediction.t, predict.gam(gam.t, new.data))
  
  gam.h <- gam(humidity ~ s(tps)+factor(month), data = data)
  prediction.h <- c(prediction.h, predict.gam(gam.h, new.data))
}

gam.prediction <- data.frame('date' = daily.testing.data$date,
                             'pressure' = prediction.p,
                             'temperature' = prediction.t,
                             'humidity' = prediction.h,
                             'month' = daily.testing.data$month)

plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(980,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p, pch = 1, col = 'red', ylim = c(980,1020), ylab = '', cex = 0.6, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$temperature, ylim = c(268,285), main = 'Temperature Prediction', ylab = 'Temperature', type = 'l', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t, col = 'red', ylim = c(268,285),ylab = '', pch = 1, cex = 0.8, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$humidity, ylim = c(2,6), main = 'Humidity Prediction',ylab = 'Humidity', type = 'l', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h, col = 'red', ylim = c(2,6),ylab = '', pch = 1, cex = 0.8, xlab = 'Date')

ARMA modeling

Pressure

nfit = length(daily.testing.date)
harmonic.train <- cbind(sin(daily.training.month*2*pi/12),cos(daily.training.month*2*pi/12))
harmonic.test <- cbind(sin(daily.testing.month*2*pi/12),cos(daily.testing.month*2*pi/12))
harmonic.all <- rbind(harmonic.train, harmonic.test)
## Fit an ARMA model to the residuals

#Iterative function
test_modelA <- function(p,d,q,train.data){
  mod = Arima(train.data,order=c(p,d,q),xreg=as.matrix(harmonic.train))
  current.aic = AIC(mod)
  df = data.frame(p,d,q,current.aic)
  names(df) <- c("p","d","q","AIC")
  return(df)
}
#orders = data.frame(Inf,Inf,Inf, Inf)
#names(orders) <- c("p","d","q","AIC")

#Orders for Arima for Pressure
#for (p in 0:9){
#    for (q in 0:9) {
#      for (d in 0:1) {
#          possibleError <- tryCatch(
#          orders<-rbind(orders,test_modelA(p,d,q,daily.pressure.train.ts)),
#          error=function(e) e
#          )
#          if(inherits(possibleError, "error")) next
#      }
#  }
#}
#orders <- orders[order(-orders$AIC),]
#tail(orders,5)
arma.pressure <- Arima(daily.pressure.train.ts,order=c(4,1,6),xreg=as.matrix(harmonic.train))
arma.pressure
## Series: daily.pressure.train.ts 
## Regression with ARIMA(4,1,6) errors 
## 
## Coefficients:
##          ar1      ar2     ar3     ar4      ma1     ma2      ma3      ma4
##       0.2212  -0.3910  0.0810  0.3321  -0.1713  0.0267  -0.2155  -0.5524
## s.e.  0.2714   0.2003  0.2358  0.1684   0.2713  0.1822   0.0969   0.1548
##           ma5     ma6    xreg1    xreg2
##       -0.1592  0.0746  -0.7215  -0.5604
## s.e.   0.0968  0.0282   0.5014   0.5009
## 
## sigma^2 = 21.21:  log likelihood = -8475.3
## AIC=16976.6   AICc=16976.72   BIC=17054.14
## compute the test statistics
testval <-
  as.numeric(arma.pressure$coef) / as.numeric(sqrt(diag(arma.pressure$var.coef)))

## print t-statistic
print(testval)
##  [1]  0.8152592 -1.9522947  0.3435231  1.9718864 -0.6316090  0.1468291
##  [7] -2.2234923 -3.5676879 -1.6447554  2.6449889 -1.4390824 -1.1187279
## print p-values
print(2*(1-pnorm(abs(testval))))
##  [1] 0.4149239385 0.0509032318 0.7312049414 0.0486225735 0.5276423839
##  [6] 0.8832668805 0.0261826206 0.0003601451 0.1000202680 0.0081693601
## [11] 0.1501271656 0.2632562195
## print significant coefficients for significance level 0.05
which(abs(testval)>qnorm((1-0.05/2)))
## [1]  4  7  8 10
par(mfrow=c(2,2))
ts.plot(ts(residuals(arma.pressure), frequency = 365, start = c(2009,1)))
acf(residuals(arma.pressure), lag.max = 40)
acf(residuals(arma.pressure)^2, lag.max = 40)
qqnorm(residuals(arma.pressure))
qqline(residuals(arma.pressure))

#perform shapiro-wilk test
shapiro.test(residuals(arma.pressure))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(arma.pressure)
## W = 0.99092, p-value = 1.518e-12

Temperature

#orders = data.frame(Inf,Inf,Inf, Inf)
#names(orders) <- c("p","d","q","AIC")

#Orders for Arima for Temperature
#for (p in 0:9){
#    for (q in 0:9) {
#      for (d in 0:1) {
#          possibleError <- tryCatch(
#          orders<-rbind(orders,test_modelA(p,d,q,daily.temp.train.ts)),
#          error=function(e) e
#          )
#          if(inherits(possibleError, "error")) next
#      }
#  }
#}
#orders <- orders[order(-orders$AIC),]
#tail(orders,5)
arma.temp <- Arima(daily.temp.train.ts,order=c(8,1,4),xreg=as.matrix(harmonic.train))
arma.temp
## Series: daily.temp.train.ts 
## Regression with ARIMA(8,1,4) errors 
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     ar5      ar6      ar7      ar8
##       0.1733  1.2400  0.1580  -0.7332  0.2136  -0.0118  -0.0170  -0.0406
## s.e.  0.0240  0.0249  0.0309   0.0322  0.0304   0.0301   0.0203   0.0200
##           ma1      ma2      ma3     ma4    xreg1    xreg2
##       -0.1918  -1.4737  -0.2645  0.9386  -0.9585  -0.9762
## s.e.   0.0154   0.0159   0.0117  0.0169   0.6957   0.7700
## 
## sigma^2 = 6.214:  log likelihood = -6706
## AIC=13442   AICc=13442.16   BIC=13531.47
## compute the test statistics
testval <-
  as.numeric(arma.temp$coef) / as.numeric(sqrt(diag(arma.temp$var.coef)))

## print t-statistic
print(testval)
##  [1]   7.2344270  49.7931149   5.1078425 -22.7548978   7.0342553  -0.3934796
##  [7]  -0.8384977  -2.0271002 -12.4184156 -92.5717346 -22.6612535  55.5215764
## [13]  -1.3776419  -1.2679162
## print p-values
print(2*(1-pnorm(abs(testval))))
##  [1] 4.674039e-13 0.000000e+00 3.258580e-07 0.000000e+00 2.003286e-12
##  [6] 6.939653e-01 4.017512e-01 4.265216e-02 0.000000e+00 0.000000e+00
## [11] 0.000000e+00 0.000000e+00 1.683139e-01 2.048279e-01
## print significant coefficients for significance level 0.05
which(abs(testval)>qnorm((1-0.05/2)))
##  [1]  1  2  3  4  5  8  9 10 11 12
par(mfrow=c(2,2))
ts.plot(ts(residuals(arma.temp), frequency = 365, start = c(2009,1)))
acf(residuals(arma.temp), lag.max = 40)
acf(residuals(arma.temp)^2, lag.max = 40)
qqnorm(residuals(arma.temp))
qqline(residuals(arma.temp))

#perform shapiro-wilk test
shapiro.test(residuals(arma.temp))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(arma.temp)
## W = 0.99711, p-value = 2.872e-05

Humidity

#Orders for Arima for Humidity
#orders = data.frame(Inf,Inf,Inf, Inf)
#names(orders) <- c("p","d","q","AIC")

#for (p in 0:9){
#    for (q in 0:9) {
#      for (d in 0:1) {
#          possibleError <- tryCatch(
#          orders<-rbind(orders,test_modelA(p,d,q,daily.humidity.train.ts)),
#          error=function(e) e
#          )
#          if(inherits(possibleError, "error")) next
#      }
#  }
#}
#orders <- orders[order(-orders$AIC),]
#tail(orders,5)
arma.humidity <- Arima(daily.humidity.train.ts,order=c(5,0,1),xreg=as.matrix(harmonic.train))
arma.humidity
## Series: daily.humidity.train.ts 
## Regression with ARIMA(5,0,1) errors 
## 
## Coefficients:
##          ar1      ar2     ar3      ar4      ar5      ma1  intercept    xreg1
##       1.8894  -1.1621  0.3121  -0.0359  -0.0081  -0.9365     5.9825  -1.2126
## s.e.  0.0233   0.0418  0.0452   0.0402   0.0198   0.0175     0.2317   0.2901
##         xreg2
##       -1.3177
## s.e.   0.3220
## 
## sigma^2 = 0.8555:  log likelihood = -3856.68
## AIC=7733.36   AICc=7733.44   BIC=7793.01
## compute the test statistics
testval <-
  as.numeric(arma.humidity$coef) / as.numeric(sqrt(diag(arma.humidity$var.coef)))

## print t-statistic
print(testval)
## [1]  81.1055562 -27.8004696   6.9094195  -0.8926125  -0.4083381 -53.3762094
## [7]  25.8197147  -4.1806021  -4.0918830
## print p-values
print(2*(1-pnorm(abs(testval))))
## [1] 0.000000e+00 0.000000e+00 4.866330e-12 3.720647e-01 6.830255e-01
## [6] 0.000000e+00 0.000000e+00 2.907382e-05 4.278845e-05
## print significant coefficients for significance level 0.05
which(abs(testval)>qnorm((1-0.05/2)))
## [1] 1 2 3 6 7 8 9
par(mfrow=c(2,2))
ts.plot(ts(residuals(arma.humidity), frequency = 365, start = c(2009,1)))
acf(residuals(arma.humidity), lag.max = 40)
acf(residuals(arma.humidity)^2, lag.max = 40)
qqnorm(residuals(arma.humidity))
qqline(residuals(arma.humidity))

#perform shapiro-wilk test
shapiro.test(residuals(arma.humidity))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(arma.humidity)
## W = 0.98891, p-value = 3.419e-14

Results

For all 3 models we see the similar results: 1) ACF plot of residuals indicates that the residuals aren’t correlated 2) ACF plot of Squared residuals indicates that the squared residuals are correlated, so we conclude that heteroskedasticity is present in the residuals. 3) Shapiro-Wilk normality test shows that the residuals are not normally distributed because it rejects the null hypothesis of normal distribution. The QQ plot also shows heavier tails than for normal distribution. We conclude that the residuals does not follow normal distribution, which is required for model fitting using MLE. We conclude that our models aren’t the good fit.

Prediction

prediction.p <- data.frame(Inf, Inf, Inf)
names(prediction.p) <- c('prediction', 'lower', 'upper')
prediction.t <- data.frame(Inf, Inf, Inf)
names(prediction.t) <- c('prediction', 'lower', 'upper')
prediction.h <- data.frame(Inf, Inf, Inf)
names(prediction.h) <- c('prediction', 'lower', 'upper')

for (i in 1:6) {
  data <- daily.training.data
  new.xreg <- harmonic.test[(1 + (i - 1)*7): (i*7),]
  if (i >= 2) {
    data <- rbind(data, daily.testing.data[c(1:((i - 1)*7)),])
  }

  arma.p <- Arima(data$pressure,order=c(4,1,6), 
                  xreg=as.matrix(harmonic.all[1:nrow(data),]))
  arma.pred.p <- predict(arma.p, n.ahead = 7, newxreg = as.matrix(new.xreg))
  
  lb <- arma.pred.p$pred - 1.96 * arma.pred.p$se
  ub <- arma.pred.p$pred + 1.96 * arma.pred.p$se
  temp <- data.frame('prediction' = arma.pred.p$pred, 'lower' = lb, 'upper' = ub)
  prediction.p <- rbind(prediction.p, temp)
  
  arma.t <- Arima(data$temperature,order=c(8,1,4), 
                  xreg=as.matrix(harmonic.all[1:nrow(data),]))
  arma.pred.t <- predict(arma.t, n.ahead = 7, newxreg = as.matrix(new.xreg))
  
  lb <- arma.pred.t$pred - 1.96 * arma.pred.t$se
  ub <- arma.pred.t$pred + 1.96 * arma.pred.t$se
  temp <- data.frame('prediction' = arma.pred.t$pred, 'lower' = lb, 'upper' = ub)
  prediction.t <- rbind(prediction.t, temp)
  
  arma.h <- Arima(data$humidity,order=c(5,0,1), 
                  xreg=as.matrix(harmonic.all[1:nrow(data),]))
  arma.pred.h <- predict(arma.h, n.ahead = 7, newxreg = as.matrix(new.xreg))
  
  lb <- arma.pred.h$pred - 1.96 * arma.pred.h$se
  ub <- arma.pred.h$pred + 1.96 * arma.pred.h$se
  temp <- data.frame('prediction' = arma.pred.h$pred, 'lower' = lb, 'upper' = ub)
  prediction.h <- rbind(prediction.h, temp)
}
prediction.p <- prediction.p[-1,]
prediction.p$date <- daily.testing.data$date

prediction.t <- prediction.t[-1,]
prediction.t$date <- daily.testing.data$date

prediction.h <- prediction.h[-1,]
prediction.h$date <- daily.testing.data$date

armax.prediction <- list(prediction.p, prediction.t, prediction.h)
save(armax.prediction, file = 'armax-prediction.RData')

Prediction Plots

load('armax-prediction.RData')
prediction.p <- armax.prediction[[1]]
prediction.t <- armax.prediction[[2]]
prediction.h <- armax.prediction[[3]]


plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$prediction, type = 'l', col = 'red', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$lower, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$upper, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Temperature Prediction', ylab = 'Temperature', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$prediction, type = 'l', col = 'red', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$lower, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$upper, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(0.5,8), main = 'Humidity Prediction', ylab = 'Humidity', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$prediction, type = 'l', col = 'red', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$lower, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$upper, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')

ARMA-GARCH for modeling pressure

# We take order of ARMA equal to (4,1,6)
# Find order for GARCH
test_modelAGG <- function(m,n){
    spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
                      mean.model=list(armaOrder=c(4,1,6),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T), distribution.model="std")
    fit = ugarchfit(spec, daily.pressure.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(m,n,current.aic)
    names(df) <- c("m","n","AIC")
    return(df)
}

ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")

for (m in 0:2){
    for (n in 0:2){
        possibleError <- tryCatch(
            ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (2,2)
test_modelAGA <- function(p,q){
    spec = ugarchspec(variance.model=list(garchOrder=c(1,1)),
                      mean.model=list(armaOrder=c(p,q),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T),
                      distribution.model="std")
    fit = ugarchfit(spec, daily.pressure.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(p,q,current.aic)
    names(df) <- c("p","q","AIC")
    return(df)
}

ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
    for (q in 0:7){1
              ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q))
    }
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)
# Update order for GARCH using ARMA (6,5s)
test_modelAGG <- function(m,n){
    spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
                      mean.model=list(armaOrder=c(6,5),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T), distribution.model="std")
    fit = ugarchfit(spec, daily.pressure.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(m,n,current.aic)
    names(df) <- c("m","n","AIC")
    return(df)
}

ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")

for (m in 0:2){
    for (n in 0:2){
        possibleError <- tryCatch(
            ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)

The final iteration shows that the optimal orders for modeling the temperature are (6,5)x(1,1).

spec.1 = ugarchspec(variance.model=list(garchOrder=c(1,1)),
                 mean.model=list(armaOrder=c(6,5),
                                 external.regressors = as.matrix(harmonic.train),
                                  include.mean=T), distribution.model="std")
pressure.arma.garch<-ugarchfit(spec.1,daily.pressure.train.ts,solver='hybrid')
pressure.arma.garch
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(6,0,5)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error     t value Pr(>|t|)
## mu     989.401851    0.091700 10789.49782 0.000000
## ar1      1.211094    0.000159  7604.32367 0.000000
## ar2     -1.446503    0.000164 -8826.57850 0.000000
## ar3      0.825087    0.000105  7827.93749 0.000000
## ar4     -0.215320    0.000049 -4413.96939 0.000000
## ar5      0.157929    0.000043  3702.87930 0.000000
## ar6     -0.062584    0.000022 -2908.71669 0.000000
## ma1     -0.162302    0.000077 -2096.15922 0.000000
## ma2      0.840706    0.000521  1614.89465 0.000000
## ma3      0.302594    0.000150  2013.04310 0.000000
## ma4      0.089584    0.000144   621.09398 0.000000
## ma5     -0.061357    0.000032 -1891.60261 0.000000
## mxreg1  -0.734180    0.022781   -32.22758 0.000000
## mxreg2   0.098535    0.014506     6.79279 0.000000
## omega    0.403571    1.367313     0.29516 0.767875
## alpha1   0.061882    0.138446     0.44698 0.654893
## beta1    0.920318    0.194140     4.74049 0.000002
## shape   11.774023    2.715305     4.33617 0.000014
## 
## Robust Standard Errors:
##          Estimate  Std. Error     t value Pr(>|t|)
## mu     989.401851    5.348159  184.998592  0.00000
## ar1      1.211094    0.005484  220.832236  0.00000
## ar2     -1.446503    0.006723 -215.171612  0.00000
## ar3      0.825087    0.004088  201.849016  0.00000
## ar4     -0.215320    0.001774 -121.398837  0.00000
## ar5      0.157929    0.001588   99.435319  0.00000
## ar6     -0.062584    0.000719  -86.984137  0.00000
## ma1     -0.162302    0.002120  -76.564650  0.00000
## ma2      0.840706    0.025542   32.914218  0.00000
## ma3      0.302594    0.006169   49.051932  0.00000
## ma4      0.089584    0.006321   14.172753  0.00000
## ma5     -0.061357    0.000838  -73.224680  0.00000
## mxreg1  -0.734180    1.171891   -0.626492  0.53099
## mxreg2   0.098535    0.812618    0.121256  0.90349
## omega    0.403571   66.086606    0.006107  0.99513
## alpha1   0.061882    6.754111    0.009162  0.99269
## beta1    0.920318    9.448194    0.097407  0.92240
## shape   11.774023   75.153613    0.156666  0.87551
## 
## LogLikelihood : -8340.529 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       5.8066
## Bayes        5.8438
## Shibata      5.8065
## Hannan-Quinn 5.8200
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic p-value
## Lag[1]                     0.05628 0.81248
## Lag[2*(p+q)+(p+q)-1][32]  17.36195 0.07254
## Lag[4*(p+q)+(p+q)-1][54]  30.31513 0.22666
## d.o.f=11
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                      2.973 0.08467
## Lag[2*(p+q)+(p+q)-1][5]     4.139 0.23722
## Lag[4*(p+q)+(p+q)-1][9]     4.615 0.48804
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]  0.008066 0.500 2.000  0.9284
## ARCH Lag[5]  0.087844 1.440 1.667  0.9890
## ARCH Lag[7]  0.248433 2.315 1.543  0.9952
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  10.8088
## Individual Statistics:              
## mu     0.03189
## ar1    0.06254
## ar2    0.03383
## ar3    0.03848
## ar4    0.06025
## ar5    0.01822
## ar6    0.06626
## ma1    0.02106
## ma2    0.04208
## ma3    0.03860
## ma4    0.03483
## ma5    0.04281
## mxreg1 0.03263
## mxreg2 0.03129
## omega  0.04452
## alpha1 0.02901
## beta1  0.04661
## shape  0.11154
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          3.83 4.14 4.73
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value     prob sig
## Sign Bias           1.1227 0.261668    
## Negative Sign Bias  1.6181 0.105754    
## Positive Sign Bias  0.1311 0.895674    
## Joint Effect       13.0328 0.004566 ***
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     26.34      0.12112
## 2    30     33.69      0.25085
## 3    40     53.14      0.06499
## 4    50     58.88      0.15768
## 
## 
## Elapsed time : 1.286806
par(mfrow=c(2,2))
ts.plot(ts(residuals(pressure.arma.garch), frequency = 365, start = c(2009,1)))
acf(residuals(pressure.arma.garch), lag.max = 40)
acf(residuals(pressure.arma.garch)^2, lag.max = 40)
qqnorm(residuals(pressure.arma.garch))
qqline(residuals(pressure.arma.garch))

ARMA-GARCH for modeling temperature

# We take order of ARMA equal to (8,1,4)
# Find order for GARCH
test_modelAGG <- function(m,n){
    spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
                      mean.model=list(armaOrder=c(8,1,4),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T), distribution.model="std")
    fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(m,n,current.aic)
    names(df) <- c("m","n","AIC")
    return(df)
}

ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")

for (m in 0:2){
    for (n in 0:2){
        possibleError <- tryCatch(
            ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (2,0)
test_modelAGA <- function(p,q){
    spec = ugarchspec(variance.model=list(garchOrder=c(2,0)),
                      mean.model=list(armaOrder=c(p,q),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T),
                      distribution.model="std")
    fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(p,q,current.aic)
    names(df) <- c("p","q","AIC")
    return(df)
}

ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
    for (q in 0:7){
        possibleError <- tryCatch(
            ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)
# We take order of ARMA equal to (6,5)
# Find order for GARCH
test_modelAGG <- function(m,n){
    spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
                      mean.model=list(armaOrder=c(6,5),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T), distribution.model="std")
    fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(m,n,current.aic)
    names(df) <- c("m","n","AIC")
    return(df)
}

ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")

for (m in 0:2){
    for (n in 0:2){
        possibleError <- tryCatch(
            ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (1,1)
test_modelAGA <- function(p,q){
    spec = ugarchspec(variance.model=list(garchOrder=c(1,1)),
                      mean.model=list(armaOrder=c(p,q),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T),
                      distribution.model="std")
    fit = ugarchfit(spec, daily.temp.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(p,q,current.aic)
    names(df) <- c("p","q","AIC")
    return(df)
}

ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
    for (q in 0:7){
        possibleError <- tryCatch(
            ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)

The final iteration shows that the optimal orders for modeling the temperature are (6,5)x(1,1).

spec.2 = ugarchspec(variance.model=list(garchOrder=c(1,1)),
                 mean.model=list(armaOrder=c(6,5),
                                 external.regressors = as.matrix(harmonic.train),
                                  include.mean=T), distribution.model="std")
temp.arma.garch<-ugarchfit(spec.2,daily.temp.train.ts,solver='hybrid')
temp.arma.garch
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(6,0,5)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error   t value Pr(>|t|)
## mu     271.263886    1.427893 189.97493 0.000000
## ar1      0.569273    0.033366  17.06132 0.000000
## ar2     -0.021357    0.072262  -0.29555 0.767570
## ar3      0.700076    0.065037  10.76426 0.000000
## ar4      0.097627    0.052148   1.87211 0.061192
## ar5     -0.468455    0.072153  -6.49249 0.000000
## ar6      0.120216    0.013258   9.06764 0.000000
## ma1      0.434493    0.038666  11.23705 0.000000
## ma2      0.230926    0.033817   6.82871 0.000000
## ma3     -0.522068    0.037897 -13.77608 0.000000
## ma4     -0.659268    0.017992 -36.64278 0.000000
## ma5     -0.031916    0.018895  -1.68912 0.091196
## mxreg1  -1.278284    0.677050  -1.88802 0.059023
## mxreg2  -1.251881    0.794864  -1.57496 0.115265
## omega    0.951081    0.250829   3.79175 0.000150
## alpha1   0.107496    0.020741   5.18288 0.000000
## beta1    0.739703    0.053418  13.84742 0.000000
## shape   21.331965    7.005240   3.04514 0.002326
## 
## Robust Standard Errors:
##          Estimate  Std. Error   t value Pr(>|t|)
## mu     271.263886    2.114332 128.29767 0.000000
## ar1      0.569273    0.042284  13.46296 0.000000
## ar2     -0.021357    0.068728  -0.31075 0.755987
## ar3      0.700076    0.089535   7.81905 0.000000
## ar4      0.097627    0.066408   1.47010 0.141535
## ar5     -0.468455    0.074539  -6.28471 0.000000
## ar6      0.120216    0.057499   2.09075 0.036551
## ma1      0.434493    0.046062   9.43280 0.000000
## ma2      0.230926    0.049612   4.65461 0.000003
## ma3     -0.522068    0.072440  -7.20687 0.000000
## ma4     -0.659268    0.060327 -10.92817 0.000000
## ma5     -0.031916    0.044842  -0.71174 0.476627
## mxreg1  -1.278284    0.830991  -1.53826 0.123984
## mxreg2  -1.251881    1.228604  -1.01895 0.308229
## omega    0.951081    0.269001   3.53560 0.000407
## alpha1   0.107496    0.021602   4.97617 0.000001
## beta1    0.739703    0.057360  12.89573 0.000000
## shape   21.331965    8.097314   2.63445 0.008427
## 
## LogLikelihood : -6663.193 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       4.6413
## Bayes        4.6786
## Shibata      4.6413
## Hannan-Quinn 4.6548
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic p-value
## Lag[1]                     0.07248  0.7878
## Lag[2*(p+q)+(p+q)-1][32]  14.17249  1.0000
## Lag[4*(p+q)+(p+q)-1][54]  30.74302  0.1969
## d.o.f=11
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                      0.641  0.4233
## Lag[2*(p+q)+(p+q)-1][5]     4.702  0.1786
## Lag[4*(p+q)+(p+q)-1][9]     6.769  0.2193
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]     2.001 0.500 2.000  0.1572
## ARCH Lag[5]     2.900 1.440 1.667  0.3047
## ARCH Lag[7]     4.291 2.315 1.543  0.3066
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  3.1633
## Individual Statistics:               
## mu     0.004208
## ar1    0.118600
## ar2    0.048165
## ar3    0.027247
## ar4    0.098318
## ar5    0.101603
## ar6    0.026675
## ma1    0.521380
## ma2    0.399189
## ma3    1.065975
## ma4    0.057628
## ma5    1.385689
## mxreg1 0.033016
## mxreg2 0.078751
## omega  0.234777
## alpha1 0.189449
## beta1  0.218194
## shape  0.063735
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          3.83 4.14 4.73
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias          0.97856 0.3279    
## Negative Sign Bias 0.09711 0.9226    
## Positive Sign Bias 0.11298 0.9101    
## Joint Effect       2.43540 0.4871    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     24.79       0.1675
## 2    30     33.29       0.2661
## 3    40     34.44       0.6777
## 4    50     46.72       0.5660
## 
## 
## Elapsed time : 1.199471
par(mfrow=c(2,2))
ts.plot(ts(residuals(temp.arma.garch), frequency = 365, start = c(2009,1)))
acf(residuals(temp.arma.garch), lag.max = 40)
acf(residuals(temp.arma.garch)^2, lag.max = 40)
qqnorm(residuals(temp.arma.garch))
qqline(residuals(temp.arma.garch))

ARMA-GARCH for modeling humidity

# We take order of ARMA equal to (6,0,5)
# Find order for GARCH
test_modelAGG <- function(m,n){
    spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
                      mean.model=list(armaOrder=c(6,5),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T), distribution.model="std")
    fit = ugarchfit(spec, daily.humidity.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(m,n,current.aic)
    names(df) <- c("m","n","AIC")
    return(df)
}

ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")

for (m in 0:2){
    for (n in 0:2){
        possibleError <- tryCatch(
            ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)
# Update ARMA using GARCH = (2,2)
test_modelAGA <- function(p,q){
    spec = ugarchspec(variance.model=list(garchOrder=c(2,2)),
                      mean.model=list(armaOrder=c(p,q),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T),
                      distribution.model="std")
    fit = ugarchfit(spec, daily.humidity.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(p,q,current.aic)
    names(df) <- c("p","q","AIC")
    return(df)
}

ordersAGA = data.frame(Inf,Inf,Inf)
names(ordersAGA) <- c("p","q","AIC")
for (p in 0:7){
    for (q in 0:7){
        possibleError <- tryCatch(
            ordersAGA<-rbind(ordersAGA,test_modelAGA(p,q)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGA <- ordersAGA[order(-ordersAGA$AIC),]
tail(ordersAGA)
# Update order for GARCH using ARMA (4,4)
test_modelAGG <- function(m,n){
    spec = ugarchspec(variance.model=list(garchOrder=c(m,n)),
                      mean.model=list(armaOrder=c(4,4),
                                      external.regressors = as.matrix(harmonic.train),
                                      include.mean=T), distribution.model="std")
    fit = ugarchfit(spec, daily.humidity.train.ts, solver = 'hybrid')
    current.aic = infocriteria(fit)[1]
    df = data.frame(m,n,current.aic)
    names(df) <- c("m","n","AIC")
    return(df)
}

ordersAGG = data.frame(Inf,Inf,Inf)
names(ordersAGG) <- c("m","n","AIC")

for (m in 0:2){
    for (n in 0:2){
        possibleError <- tryCatch(
            ordersAGG<-rbind(ordersAGG,test_modelAGG(m,n)),
            error=function(e) e
        )
        if(inherits(possibleError, "error")) next
    }
}
ordersAGG <- ordersAGG[order(-ordersAGG$AIC),]
tail(ordersAGG)

The final iteration shows that the optimal orders for modeling the Humidity are (4,4)x(2,2).

spec.3 = ugarchspec(variance.model=list(garchOrder=c(2,2)), 
                    mean.model=list(armaOrder=c(4,4), 
                                    external.regressors = as.matrix(harmonic.train),
                                    include.mean=T), 
                    distribution.model="std")
hum.arma.garch<-ugarchfit(spec.3,daily.humidity.train.ts,solver='hybrid')
hum.arma.garch
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(2,2)
## Mean Model   : ARFIMA(4,0,4)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error     t value Pr(>|t|)
## mu      3.500533    0.329996    10.60780 0.000000
## ar1     0.308159    0.018164    16.96554 0.000000
## ar2     0.537184    0.013391    40.11657 0.000000
## ar3     0.747056    0.011639    64.18771 0.000000
## ar4    -0.598562    0.018233   -32.82787 0.000000
## ma1     0.661218    0.009522    69.43926 0.000000
## ma2    -0.130586    0.003378   -38.66121 0.000000
## ma3    -0.973952    0.000099 -9821.81974 0.000000
## ma4    -0.251958    0.010655   -23.64791 0.000000
## mxreg1 -0.742382    0.200171    -3.70874 0.000208
## mxreg2 -0.752662    0.187394    -4.01647 0.000059
## omega   0.020951    0.007805     2.68427 0.007269
## alpha1  0.092844    0.016580     5.59990 0.000000
## alpha2  0.026643    0.017224     1.54688 0.121892
## beta1   0.027768    0.051735     0.53673 0.591456
## beta2   0.832322    0.047072    17.68187 0.000000
## shape   8.794568    1.477189     5.95359 0.000000
## 
## Robust Standard Errors:
##         Estimate  Std. Error     t value Pr(>|t|)
## mu      3.500533    0.355593  9.8442e+00 0.000000
## ar1     0.308159    0.018952  1.6260e+01 0.000000
## ar2     0.537184    0.012301  4.3671e+01 0.000000
## ar3     0.747056    0.010426  7.1656e+01 0.000000
## ar4    -0.598562    0.020013 -2.9909e+01 0.000000
## ma1     0.661218    0.009253  7.1461e+01 0.000000
## ma2    -0.130586    0.003134 -4.1661e+01 0.000000
## ma3    -0.973952    0.000085 -1.1491e+04 0.000000
## ma4    -0.251958    0.011229 -2.2438e+01 0.000000
## mxreg1 -0.742382    0.204004 -3.6391e+00 0.000274
## mxreg2 -0.752662    0.208267 -3.6139e+00 0.000302
## omega   0.020951    0.008376  2.5013e+00 0.012374
## alpha1  0.092844    0.016135  5.7544e+00 0.000000
## alpha2  0.026643    0.016852  1.5810e+00 0.113886
## beta1   0.027768    0.039358  7.0552e-01 0.480486
## beta2   0.832322    0.036042  2.3093e+01 0.000000
## shape   8.794568    1.239269  7.0966e+00 0.000000
## 
## LogLikelihood : -3693.831 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       2.5779
## Bayes        2.6131
## Shibata      2.5778
## Hannan-Quinn 2.5906
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic p-value
## Lag[1]                   9.406e-04  0.9755
## Lag[2*(p+q)+(p+q)-1][23] 9.075e+00  1.0000
## Lag[4*(p+q)+(p+q)-1][39] 1.820e+01  0.6924
## d.o.f=8
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                          statistic p-value
## Lag[1]                       1.042  0.3074
## Lag[2*(p+q)+(p+q)-1][11]     5.425  0.5171
## Lag[4*(p+q)+(p+q)-1][19]     9.345  0.5226
## d.o.f=4
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[5]    0.8100 0.500 2.000  0.3681
## ARCH Lag[7]    0.9807 1.473 1.746  0.7628
## ARCH Lag[9]    1.3682 2.402 1.619  0.8748
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  1.4089
## Individual Statistics:              
## mu     0.06731
## ar1    0.04072
## ar2    0.03630
## ar3    0.02752
## ar4    0.05690
## ma1    0.03421
## ma2    0.02551
## ma3    0.03256
## ma4    0.02783
## mxreg1 0.07684
## mxreg2 0.05602
## omega  0.05091
## alpha1 0.03350
## alpha2 0.05311
## beta1  0.03857
## beta2  0.03708
## shape  0.14983
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          3.64 3.95 4.51
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value      prob sig
## Sign Bias            2.143 3.223e-02  **
## Negative Sign Bias   0.906 3.650e-01    
## Positive Sign Bias   1.221 2.221e-01    
## Joint Effect        27.469 4.694e-06 ***
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     37.84     0.006221
## 2    30     57.51     0.001248
## 3    40     63.34     0.008155
## 4    50     74.68     0.010495
## 
## 
## Elapsed time : 0.8980579
par(mfrow=c(2,2))
ts.plot(ts(residuals(hum.arma.garch), frequency = 365, start = c(2009,1)))
acf(residuals(hum.arma.garch), lag.max = 40)
acf(residuals(hum.arma.garch)^2, lag.max = 40)
qqnorm(residuals(hum.arma.garch))
qqline(residuals(hum.arma.garch))

Arma-Garch Prediction

prediction.p <- data.frame(Inf, Inf, Inf)
names(prediction.p) <- c('prediction', 'lower', 'upper')
prediction.t <- data.frame(Inf, Inf, Inf)
names(prediction.t) <- c('prediction', 'lower', 'upper')
prediction.h <- data.frame(Inf, Inf, Inf)
names(prediction.h) <- c('prediction', 'lower', 'upper')

for (i in 1:6) {
  data <- daily.training.data
  if (i >= 2) {
    data <- rbind(data, daily.testing.data[c(1:((i - 1)*7)),])
  }
  
  spec.p = ugarchspec(variance.model=list(garchOrder=c(1,1)), 
                      mean.model=list(armaOrder=c(6,5), 
                                      external.regressors = as.matrix(harmonic.all[c(1:nrow(data)),]),
                                      include.mean=T), 
                      distribution.model="std")
  arma.garch.p <-ugarchfit(spec.p, data$pressure,solver='hybrid')
  fore <- ugarchforecast(arma.garch.p, n.ahead = 7)
  fore <- fore@forecast
  temp <- data.frame(fore$seriesFor, 
                     fore$seriesFor - 1.96 * fore$sigmaFor, 
                     fore$seriesFor + 1.96 * fore$sigmaFor)
  names(temp) <- c('prediction', 'lower', 'upper')
  prediction.p <- rbind(prediction.p, temp)
  
  
  spec.t = ugarchspec(variance.model=list(garchOrder=c(1,1)), 
                      mean.model=list(armaOrder=c(6,5), 
                                      external.regressors = as.matrix(harmonic.all[c(1:nrow(data)),]),
                                      include.mean=T), 
                      distribution.model="std")
  arma.garch.t <-ugarchfit(spec.t, data$temperature,solver='hybrid')
  fore <- ugarchforecast(arma.garch.t, n.ahead = 7)
  fore <- fore@forecast
  temp <- data.frame('prediction' = fore$seriesFor, 
                     'lower' = fore$seriesFor - 1.96 * fore$sigmaFor, 
                     'upper' = fore$seriesFor + 1.96 * fore$sigmaFor)
  names(temp) <- c('prediction', 'lower', 'upper')
  prediction.t <- rbind(prediction.t, temp)
  
  spec.h = ugarchspec(variance.model=list(garchOrder=c(2,2)), 
                      mean.model=list(armaOrder=c(4,4), 
                                      external.regressors = as.matrix(harmonic.all[c(1:nrow(data)),]),
                                      include.mean=T), 
                      distribution.model="std")
  arma.garch.h <-ugarchfit(spec.h, data$humidity,solver='hybrid')
  fore <- ugarchforecast(arma.garch.h, n.ahead = 7)
  fore <- fore@forecast
  temp <- data.frame('prediction' = fore$seriesFor, 
                     'lower' = fore$seriesFor - 1.96 * fore$sigmaFor, 
                     'upper' = fore$seriesFor + 1.96 * fore$sigmaFor)
  names(temp) <- c('prediction', 'lower', 'upper')
  prediction.h <- rbind(prediction.h, temp)
  
}
prediction.p <- prediction.p[-1,]
prediction.p$date <- daily.testing.data$date

prediction.t <- prediction.t[-1,]
prediction.t$date <- daily.testing.data$date

prediction.h <- prediction.h[-1,]
prediction.h$date <- daily.testing.data$date

arma.garch.prediction <- list(prediction.p, prediction.t, prediction.h)
save(arma.garch.prediction, file = 'arma-garch-prediction.RData')

Prediction Plots

load('arma-garch-prediction.RData')
prediction.p <- arma.garch.prediction[[1]]
prediction.t <- arma.garch.prediction[[2]]
prediction.h <- arma.garch.prediction[[3]]

plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$prediction, type = 'l', col = 'red', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$lower, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$upper, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Temperature Prediction', ylab = 'Temperature', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$prediction, type = 'l', col = 'red', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$lower, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$upper, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(0.5,8), main = 'Humidity Prediction', ylab = 'Humidity', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$prediction, type = 'l', col = 'red', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$lower, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$upper, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')

# VAR modeling

train.data <- cbind(daily.pressure.train.ts, daily.temp.train.ts, daily.humidity.train.ts)
test.data <- cbind(daily.pressure.test.ts, daily.temp.test.ts, daily.humidity.test.ts)
colnames(train.data) <- c("pressure", "temperature", "humidity")
var.select<-VARselect(train.data, exogen=harmonic.train, type="both")
var.select$selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      4      3      3      4
var1<-VAR(train.data,p=3, exogen=harmonic.train, type="both")
summary(var1)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: pressure, temperature, humidity 
## Deterministic variables: both 
## Sample size: 2876 
## Log Likelihood: -17670.276 
## Roots of the characteristic polynomial:
## 0.8093 0.7514 0.667 0.4092 0.4092 0.3992 0.3992 0.2819 0.2819
## Call:
## VAR(y = train.data, p = 3, type = "both", exogen = harmonic.train)
## 
## 
## Estimation results for equation pressure: 
## ========================================= 
## pressure = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + trend + exo1 + exo2 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## pressure.l1     1.054e+00  2.058e-02  51.218  < 2e-16 ***
## temperature.l1 -1.804e-01  4.883e-02  -3.694 0.000225 ***
## humidity.l1     8.324e-01  1.272e-01   6.542 7.16e-11 ***
## pressure.l2    -4.156e-01  2.948e-02 -14.097  < 2e-16 ***
## temperature.l2  5.542e-02  6.278e-02   0.883 0.377425    
## humidity.l2    -5.627e-01  1.599e-01  -3.518 0.000441 ***
## pressure.l3     1.391e-01  2.059e-02   6.755 1.72e-11 ***
## temperature.l3  5.033e-02  4.924e-02   1.022 0.306826    
## humidity.l3     5.591e-02  1.272e-01   0.439 0.660402    
## const           2.391e+02  1.746e+01  13.692  < 2e-16 ***
## trend           1.409e-04  1.058e-04   1.331 0.183196    
## exo1            1.458e-01  1.879e-01   0.776 0.437845    
## exo2            1.302e-02  2.184e-01   0.060 0.952466    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 4.574 on 2863 degrees of freedom
## Multiple R-Squared: 0.6748,  Adjusted R-squared: 0.6734 
## F-statistic: 495.1 on 12 and 2863 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation temperature: 
## ============================================ 
## temperature = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + trend + exo1 + exo2 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## pressure.l1     0.0417598  0.0108674   3.843 0.000124 ***
## temperature.l1  0.9599952  0.0257901  37.223  < 2e-16 ***
## humidity.l1     0.0066496  0.0671988   0.099 0.921182    
## pressure.l2     0.0358979  0.0155684   2.306 0.021191 *  
## temperature.l2 -0.1480294  0.0331556  -4.465 8.33e-06 ***
## humidity.l2    -0.1183646  0.0844662  -1.401 0.161225    
## pressure.l3    -0.0416479  0.0108743  -3.830 0.000131 ***
## temperature.l3  0.0555683  0.0260067   2.137 0.032708 *  
## humidity.l3    -0.0951040  0.0672013  -1.415 0.157115    
## const           2.9812421  9.2208031   0.323 0.746479    
## trend           0.0001385  0.0000559   2.478 0.013269 *  
## exo1           -0.9534103  0.0992193  -9.609  < 2e-16 ***
## exo2           -1.5338899  0.1153211 -13.301  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.416 on 2863 degrees of freedom
## Multiple R-Squared: 0.9062,  Adjusted R-squared: 0.9058 
## F-statistic:  2306 on 12 and 2863 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation humidity: 
## ========================================= 
## humidity = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + trend + exo1 + exo2 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## pressure.l1     1.826e-02  3.992e-03   4.575 4.96e-06 ***
## temperature.l1  9.198e-02  9.474e-03   9.709  < 2e-16 ***
## humidity.l1     7.787e-01  2.468e-02  31.544  < 2e-16 ***
## pressure.l2     2.978e-04  5.719e-03   0.052 0.958481    
## temperature.l2 -3.556e-03  1.218e-02  -0.292 0.770309    
## humidity.l2    -2.635e-01  3.103e-02  -8.492  < 2e-16 ***
## pressure.l3    -8.311e-03  3.995e-03  -2.081 0.037565 *  
## temperature.l3 -3.575e-02  9.553e-03  -3.742 0.000186 ***
## humidity.l3     1.205e-01  2.469e-02   4.883 1.10e-06 ***
## const          -2.293e+01  3.387e+00  -6.771 1.55e-11 ***
## trend           3.695e-05  2.053e-05   1.799 0.072077 .  
## exo1           -4.314e-01  3.645e-02 -11.835  < 2e-16 ***
## exo2           -4.130e-01  4.236e-02  -9.750  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.8874 on 2863 degrees of freedom
## Multiple R-Squared: 0.8826,  Adjusted R-squared: 0.8821 
## F-statistic:  1794 on 12 and 2863 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##             pressure temperature humidity
## pressure      20.923      -4.692  -1.3364
## temperature   -4.692       5.835   1.4163
## humidity      -1.336       1.416   0.7874
## 
## Correlation matrix of residuals:
##             pressure temperature humidity
## pressure      1.0000     -0.4246  -0.3293
## temperature  -0.4246      1.0000   0.6607
## humidity     -0.3293      0.6607   1.0000
prediction.p <- data.frame(Inf, Inf, Inf)
names(prediction.p) <- c('fcst', 'lower', 'upper')
prediction.t <- data.frame(Inf, Inf, Inf)
names(prediction.t) <- c('fcst', 'lower', 'upper')
prediction.h <- data.frame(Inf, Inf, Inf)
names(prediction.h) <- c('fcst', 'lower', 'upper')

for (i in 1:6) {
  data <- train.data
  if (i >= 2) {
    data <- rbind(data, test.data[c(1:((i - 1)*7)),])
  }
  exo <- harmonic.all[1:nrow(data),]
  new.exo <- harmonic.test[(1 + (i - 1) * 7):(i * 7),]
  
  varx<-VAR(data,p=3, exogen=exo, type="both")
  pred <- predict(varx, n.ahead = 7, ci = 0.95, dumvar = new.exo)
  pred <- pred$fcst
  prediction.p <- rbind(prediction.p, pred$pressure[,1:3])
  prediction.t <- rbind(prediction.t, pred$temperature[,1:3])
  prediction.h <- rbind(prediction.h, pred$humidity[,1:3])
}

prediction.p <- prediction.p[-1,]
prediction.p$date <- daily.testing.data$date

prediction.t <- prediction.t[-1,]
prediction.t$date <- daily.testing.data$date

prediction.h <- prediction.h[-1,]
prediction.h$date <- daily.testing.data$date

varx.prediction <- list(prediction.p, prediction.t, prediction.h)
save(varx.prediction, file = 'varx-prediction.RData')

Prediction Plots

load('varx-prediction.RData')
prediction.p <- varx.prediction[[1]]
prediction.t <- varx.prediction[[2]]
prediction.h <- varx.prediction[[3]]

plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$fcst, type = 'l', col = 'red', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$lower, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.p$upper, type = 'l', col = 'blue', ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Temperature Prediction', ylab = 'Temperature', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$fcst, type = 'l', col = 'red', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$lower, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.t$upper, type = 'l', col = 'blue', ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')

plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(0.5,8), main = 'Humidity Prediction', ylab = 'Humidity', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$fcst, type = 'l', col = 'red', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$lower, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, prediction.h$upper, type = 'l', col = 'blue', ylim = c(0.5,8), ylab = '', cex = 0.6, xlab = 'Date')

Results Looking at the roots of the characteristic polynomial, we see that their absolute values are less than 1, which indicates that our VAR model is stable.

Looking at the output of the models for all 3 weather components we conclude the following: -trend isn’t statistically significant for all 3 weather components. -harmonic seasonality coefficients aren’t statistically significant for the first model (for pressure). However, they are statistically significant for models for temperature and humidity. -for pressure: lead lag relationship with temperature at lag 1, lag 2 and lag 3, lead lag relationship with humidity only at lag 3. Lead lag relationship with the time series itself is statistically significant at lag 1,2 and 3. -for temperature: lead lag relationship with pressure at lag 1, lag 2 and lag 3, lead lag relationship with humidity only at lag 3. Lead lag relationship with the time series itself is statistically significant at lag 1,2 and 3. -for humidity: lead lag relationship with temperature at lag 1 and lag 3, lead lag relationship with pressure at lag 1 and lag 3. Lead lag relationship with the time series itself is statistically significant at lag 1,2 and 3.

var.restrict.model=restrict(var1)  
summary(var.restrict.model)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: pressure, temperature, humidity 
## Deterministic variables: both 
## Sample size: 2876 
## Log Likelihood: -17679.725 
## Roots of the characteristic polynomial:
## 0.793 0.7476 0.6734 0.4205 0.4205 0.3401 0.3401 0.2352 0.2352
## Call:
## VAR(y = train.data, p = 3, type = "both", exogen = harmonic.train)
## 
## 
## Estimation results for equation pressure: 
## ========================================= 
## pressure = pressure.l1 + temperature.l1 + humidity.l1 + pressure.l2 + humidity.l2 + pressure.l3 + temperature.l3 + const 
## 
##                 Estimate Std. Error t value Pr(>|t|)    
## pressure.l1      1.05763    0.01981  53.390  < 2e-16 ***
## temperature.l1  -0.14968    0.03557  -4.208 2.66e-05 ***
## humidity.l1      0.78140    0.12026   6.498 9.58e-11 ***
## pressure.l2     -0.42371    0.02688 -15.763  < 2e-16 ***
## humidity.l2     -0.49051    0.11311  -4.337 1.50e-05 ***
## pressure.l3      0.14443    0.01940   7.445 1.27e-13 ***
## temperature.l3   0.08022    0.03088   2.598  0.00943 ** 
## const          237.15317   15.34800  15.452  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 4.573 on 2868 degrees of freedom
## Multiple R-Squared:     1,   Adjusted R-squared:     1 
## F-statistic: 1.682e+07 on 8 and 2868 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation temperature: 
## ============================================ 
## temperature = pressure.l1 + temperature.l1 + pressure.l2 + temperature.l2 + humidity.l2 + pressure.l3 + trend + exo1 + exo2 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## pressure.l1     4.585e-02  1.013e-02   4.526 6.25e-06 ***
## temperature.l1  9.613e-01  2.013e-02  47.763  < 2e-16 ***
## pressure.l2     3.922e-02  1.475e-02   2.659  0.00788 ** 
## temperature.l2 -9.689e-02  2.240e-02  -4.326 1.57e-05 ***
## humidity.l2    -1.919e-01  4.392e-02  -4.370 1.29e-05 ***
## pressure.l3    -4.524e-02  9.615e-03  -4.705 2.66e-06 ***
## trend           1.354e-04  5.517e-05   2.454  0.01420 *  
## exo1           -9.405e-01  9.209e-02 -10.213  < 2e-16 ***
## exo2           -1.523e+00  1.025e-01 -14.868  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.416 on 2867 degrees of freedom
## Multiple R-Squared: 0.9999,  Adjusted R-squared: 0.9999 
## F-statistic: 4.406e+06 on 9 and 2867 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation humidity: 
## ========================================= 
## humidity = pressure.l1 + temperature.l1 + humidity.l1 + humidity.l2 + pressure.l3 + temperature.l3 + humidity.l3 + const + exo1 + exo2 
## 
##                  Estimate Std. Error t value Pr(>|t|)    
## pressure.l1      0.018813   0.002659   7.076 1.86e-12 ***
## temperature.l1   0.091166   0.007423  12.281  < 2e-16 ***
## humidity.l1      0.781483   0.023442  33.337  < 2e-16 ***
## humidity.l2     -0.270103   0.024509 -11.021  < 2e-16 ***
## pressure.l3     -0.008014   0.002693  -2.976  0.00294 ** 
## temperature.l3  -0.036781   0.007514  -4.895 1.04e-06 ***
## humidity.l3      0.124404   0.022485   5.533 3.44e-08 ***
## const          -23.907760   3.341197  -7.155 1.05e-12 ***
## exo1            -0.425631   0.036282 -11.731  < 2e-16 ***
## exo2            -0.399803   0.041724  -9.582  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.8874 on 2866 degrees of freedom
## Multiple R-Squared: 0.9819,  Adjusted R-squared: 0.9818 
## F-statistic: 1.555e+04 on 10 and 2866 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##             pressure temperature humidity
## pressure      20.948      -4.698  -1.3334
## temperature   -4.698       5.847   1.4167
## humidity      -1.333       1.417   0.7883
## 
## Correlation matrix of residuals:
##             pressure temperature humidity
## pressure      1.0000     -0.4245  -0.3281
## temperature  -0.4245      1.0000   0.6599
## humidity     -0.3281      0.6599   1.0000

Forecasting comparison

col = wes_palette(n = 5, name = 'FantasticFox1')
col <- col[-4]
plot(daily.testing.data$date, daily.testing.data$pressure, type = 'l', ylim = c(970,1020), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, gam.prediction$pressure, type = 'l', col = col[1], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, arma.garch.prediction[[1]]$prediction, type = 'l', col = col[2], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, varx.prediction[[1]]$fcst, type = 'l', col = col[3], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, armax.prediction[[1]]$prediction, type = 'l', col = col[4], ylim = c(970,1020), ylab = '', cex = 0.6, xlab = 'Date')
legend('bottomright', legend = c('observation', 'Nonparametric Trend + ANOVA Seasonality', 
                                 'ARMA-GARCH', 'VARX', 'ARMAX'), col = c('black', col), lty = 1,
       cex = 0.8)

plot(daily.testing.data$date, daily.testing.data$temperature, type = 'l', ylim = c(264,289), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, gam.prediction$temperature, type = 'l', col = col[1], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, arma.garch.prediction[[2]]$prediction, type = 'l', col = col[2], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, varx.prediction[[2]]$fcst, type = 'l', col = col[3], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, armax.prediction[[2]]$prediction, type = 'l', col = col[4], ylim = c(264,289), ylab = '', cex = 0.6, xlab = 'Date')
legend('bottomright', legend = c('observation', 'Nonparametric Trend + ANOVA Seasonality', 
                                 'ARMA-GARCH', 'VARX', 'ARMAX'), col = c('black', col), lty = 1,
       cex = 0.8)

plot(daily.testing.data$date, daily.testing.data$humidity, type = 'l', ylim = c(1,6), main = 'Pressure Prediction', ylab = 'Pressure', xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, gam.prediction$humidity, type = 'l', col = col[1], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, arma.garch.prediction[[3]]$prediction, type = 'l', col = col[2], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, varx.prediction[[3]]$fcst, type = 'l', col = col[3], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
par(new = T)
plot(daily.testing.data$date, armax.prediction[[3]]$prediction, type = 'l', col = col[4], ylim = c(1,6), ylab = '', cex = 0.6, xlab = 'Date')
legend('bottomright', legend = c('observation', 'Nonparametric Trend + ANOVA Seasonality', 
                                 'ARMA-GARCH', 'VARX', 'ARMAX'), col = c('black', col), lty = 1,
       cex = 0.8)

Forecasting Measures

MAE

gam.mae <- colMeans(abs(daily.testing.data[,2:4] - gam.prediction[,2:4]))

arma.garch.mae <- colMeans(abs(daily.testing.data[,2:4] - cbind(arma.garch.prediction[[1]]$prediction, arma.garch.prediction[[2]]$prediction,arma.garch.prediction[[3]]$prediction)))

varx.mae <- colMeans(abs(daily.testing.data[,2:4] - cbind(varx.prediction[[1]]$fcst, varx.prediction[[2]]$fcst,varx.prediction[[3]]$fcst)))

armax.mae <- colMeans(abs(daily.testing.data[,2:4] - cbind(armax.prediction[[1]]$prediction, armax.prediction[[2]]$prediction,armax.prediction[[3]]$prediction)))

mae <- rbind(gam.mae, arma.garch.mae, varx.mae, armax.mae)
rownames(mae) <- c('Nonparametric Trend + ANOVA Seasonality', 
                                 'ARMA-GARCH', 'VARX', 'ARMAX')
kable(mae, align = 'c', row.names = TRUE)
pressure temperature humidity
Nonparametric Trend + ANOVA Seasonality 9.073658 3.700714 0.9060578
ARMA-GARCH 9.365320 3.781821 1.0519441
VARX 8.765609 3.587084 0.8670008
ARMAX 9.203988 3.225209 0.8150322

PM

gam.pm <- colSums((gam.prediction[,2:4] - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)

arma.garch.pm <- colSums((cbind(arma.garch.prediction[[1]]$prediction, arma.garch.prediction[[2]]$prediction,arma.garch.prediction[[3]]$prediction) - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)

varx.pm <- colSums((cbind(varx.prediction[[1]]$fcst, varx.prediction[[2]]$fcst,varx.prediction[[3]]$fcst) - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)

armax.pm <- colSums((cbind(armax.prediction[[1]]$prediction, armax.prediction[[2]]$prediction,armax.prediction[[3]]$prediction) - daily.testing.data[,2:4])^2) / colSums((daily.testing.data[,2:4] - colMeans(daily.testing.data[,2:4]))^2)

pm <- rbind(gam.pm, arma.garch.pm, varx.pm, armax.pm)
rownames(pm) <- c('Nonparametric Trend + ANOVA Seasonality', 
                                 'ARMA-GARCH', 'VARX', 'ARMAX')
kable(pm, align = 'c', row.names = TRUE)
pressure temperature humidity
Nonparametric Trend + ANOVA Seasonality 0.0002111 0.0000954 3.2e-06
ARMA-GARCH 0.0002437 0.0001049 4.6e-06
VARX 0.0002203 0.0000934 3.2e-06
ARMAX 0.0002367 0.0000813 2.7e-06